#install.packages("rlang")
#library(rlang)
library(tidyverse)
library(haven)
library(formatR)
library(lubridate)
library(smooth)
library(forecast)
library(scales)

library(ggplot2)
library(readxl)
library(tidyverse)
library(data.table)
library(quantmod)
library(geofacet)
library(janitor)


knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE
                      )

Inspecting new FY22 data files

Data files for closed years have been obtained from IOC. Source spending data is at the fund-agency-object level and source revenue data is at the fund-agency-source level.

Revenue File:
- 684 Fund Numbers
- 1185 Revenue sources
- 80 Agencies

Expenditure File:
- 708 Funds
- 107 Agencies - 98 Division Numbers, 313 Division names

Look for cross year differences in the availability of transfer-in and transfer-out information

Codebook

Using the comptrollers variables:

Detail Object is a department classification used by the State to group expenses.

Object: e.g. 1100 - Personal Services; 880 - Debt Service; 8813 - Current Maturities-Interest

Group: RE-reimbursements; TR-Transfers; 9-Other

Category: T - Taxable Bond Fund Payments, 4 - Nonprofit organizations grants; 7 - Personal Services Related

Type: T - Statutory Transfers; 1 - Operations; 6 - Permanent Improvements; 8-Debt Service; 9-Refunds

Class: ex. 402 - Income Tax Refunds, 407 - Sales Tax Refunds

Appropriation Category: 8800 - Debt Service; 1129 - Employee Retirement Paid State

Data Creation and Cleaning

  • Do the FOIA request
    1. In a week or so, they send the expenditure and revenue data as excel files.
    1. Checks whether there are any new agencies, re-used funds etc. Create a list of funds, agencies, fund names, etc. for the new year and compare it to the immeditate prior year to identify new funds.
    1. Update the funds_ab_in file which shows the use of funds. Use criteria to determine if the new funds should be in or out of the all-funds frame.
    1. Then, download the excel files that are sent to you.
    1. Open and change the names to be consistent with other files such as AGENCYNAME–> agency_name
    1. Then, make the expenditure and receipts “numbers”, not “general”.
    1. Save them and import to Stata.

Combine past years: All revenue files are in a revenue folder that I reference when I set the working directory. When adding new fiscal years, put the the newest year of data for revenue and expenditures in their respective folders.

Pre-FY2022

The code below chunk takes the .dta files for all fiscal years before FY 2022 and binds them together. Variable names were manually changed by past researchers so that they were consistent across years.

  • Additional variables are created: object, category, sequence, type, trans_agency, trans_type

  • trans_agency and trans_type are only for transfers. You can search for “transfers” under the variable “org_name”

Agency == 799 for Statutory transfers Object == 1993 is for Interfund cash transfers

• NOTE: trans_agency AND trans_type: ONLY FOR TRANSFERS. o Search for TRANSFERS under the variable “org_name”. o For these TRANSFERS, variables object, category, sequence, type, are stored as “.”. o For those other than TRANSFERS, variables trans_agency AND trans_type, are stored as “.”. o For all rows with trans_agency and trans_type stored as “.”  Fill in the object variable as the first 4 numbers of obj_seq_type.  Fill In category as the first number of obj_seq_type.  Fill in sequence as the 5th and 6th digits of obj_seq_type.  Fill in type as the last two numbers of obj_seq_type. o For all rows in which the 4 variables are “.” i.e. for TRANSFERS,  Fill in trans_agency as first 3 digits of obj_seq_type.  Fill in trans_type as rest 6 of the letters in obj_seq_type.

exp_temp <- exp_temp %>% 
  mutate(transfer = ifelse(org_name == "TRANSFERS", 1, 0),
         trans_agency = ifelse(),
         trans_type = ifelse())
setwd("C:/Users/aleaw/OneDrive/Desktop/PhD Fall 2021 - Spring 2022/Merriman RA/Fiscal Futures FY2022/revenue")

# does all of stata code lines 1-514 of combining yearly data

allrevfiles22 = list.files(path = "C:/Users/aleaw/OneDrive/Desktop/PhD Fall 2021 - Spring 2022/Merriman RA/Fiscal Futures FY2022/revenue",  pattern = ".dta") %>%  lapply(read_dta) %>% bind_rows
#Fy21: 62295 observations, 13 variables
#FY22: 65094 obs, 13 vars

write_csv(allrevfiles22, "allrevfiles22.csv")

Reads in dta file and leaves fund as a character. No longer have to worry about preserving leading zeros in categories like the fund numbers. State code used to force fund, source, and from_fund to be 4 digits long and preserve leading zeros and fund was 3 digits long with leading zeros.

setwd("C:/Users/aleaw/OneDrive/Desktop/PhD Fall 2021 - Spring 2022/Merriman RA/Fiscal Futures FY2022/expenditures")

allexpfiles22 = list.files(path = "C:/Users/aleaw/OneDrive/Desktop/PhD Fall 2021 - Spring 2022/Merriman RA/Fiscal Futures FY2022/expenditures",  pattern = ".dta") %>%  lapply(read_dta) %>% bind_rows

#fy21 213372 observations, 20 variables
# fy22 225587 obs, 21 vars.

write_csv(allexpfiles22, "allexpfiles22.csv")

Code below reads in the csv files created in chunks above (allrevfiles.csv and allrexpfiles.csv). These files contain all years of data combined into one file BEFORE any recoding is done. Do not use this file for summing categories because it is just an in between step before recoding revenue and expenditure categories.

# combined in past chunks called create-rev-csv and create-exp-csv

allrevfiles <- read_csv("allrevfiles22.csv") #combined but not recoded
allexpfiles <- read_csv("allexpfiles22.csv") #combined but not recoded

Normally, when your receive the new fiscal year files from the Comptrollers office, you will need to change the variable names so that they are consistent with past years. This is an example of reading in the new file and changing the variable names.

For FY 2022 and after, .dta files can be avoided entirely and .csv files and R code will be used.All files before this year had been saved and passed on as .dta files for Stata code before the transition to R in Fall 2022

Example code below: Read in excel file and rename columns so that it plays well with the other years’ files.

read_xlsx("Fis_Fut_Rev_2022.xlsx") %>% 
  rename(fy = 'FISCAL YEAR',
         fund = 'FUND #',
         fund_name = 'FUND NAME',
         agency = 'AGENCY #',
         agency_name = 'AGENCY NAME',
         source = 'REVENUE SOURCE #',
         source_name = 'REV SRC NAME',
         receipts = 'REVENUE YTD AMOUNT'
  ) %>%
  # do these come from funds_ab_whatever file?
  mutate(fund_cat = FIND_COLUMN, #create fund_cat column
         fund_cat_name = FIND_NAME) # create fund_cat_name column

read_xlsx("Fis_Fut_Exp_2022.xlsx") %>% 
  rename(fy = 'FISCAL YEAR',
         fund = 'FUND #',
         fund_name = 'FUND NAME',
         agency = 'AGENCY #',
         agency_name = 'AGENCY NAME',
         appr_org = 'DIVISION',
         org_name = 'DIVISION NAME',
         obj_seq_type = 'APPROPRIATION #',
         wh_approp_name = 'APPROPRIATION NAME',
         exp_net_xfer = 'NET OF TRANS AMOUNT',
         expenditure = 'EXPENDED THRI 7/26/22'
  ) %>%
  # do these come from funds_ab_whatever file?
  mutate(data_source = "exp IOC Aug 2022",
         object = ,
         seq = ,
         type = ,
         fund_cat = FIND_COLUMN, #create fund_cat column
         fund_cat_name = FIND_NAME) # create fund_cat_name column

Identify new and reused funds for newest fiscal year. Recode funds to take into account different fund numbers/names over the years. Update fund_ab_in_2021.xlsx with any changes from previous fiscal year.

Clarify and add steps for identifying new and reused funds.

For funds that were reused once, a 9 replaces the 0 as the first digit. If reused twice, then the first two values are 10.
- Ex. 0350 –> 9350 because its use changed.
- Ex. 0367 becomes 10367 because its use has changed twice now. There was fund 0367 originally, then its use changed and it was recoded as 9367, and now it changed again so it is a 10367.

# if first character is a 0, replace with a 9

rev_1998_2022 <- allrevfiles %>%
      mutate(fund = ifelse(fy < 2002 & fund %in% c("0730", "0241", "0350", "0367", "0381", "0382", "0526", "0603", "0734", "0913", "0379"), str_replace(fund, "0","9"), fund)) %>%
  
  mutate(fund = ifelse(fy < 2008 & fund %in% c("0027", "0033", "0037", "0058", "0062", "0066", "0075", "0083", "0116", "0119", "0120", "0122", "0148", "0149", "0157", "0158", "0166", "0194", "0201", "0209", "0211", "0217", "0223", "0231", "0234", "0253", "0320", "0503", "0505", "0512", "0516", "0531", "0532", "0533", "0547", "0563", "0579", "0591", "0606", "0616", "0624", "0659", "0662", "0665", "0676", "0710", 
"0068", "0076", "0115", "0119", "0168", "0182", "0199", "0241", "0307", "0506", "0509", "0513"), str_replace(fund, "0","9"), fund)) %>%
  
  mutate(fund = ifelse(fy < 2016 & fund %in% c("0263", "0399", "0409"), str_replace(fund, "0","9"), fund)) %>%
  
  mutate(fund =  ifelse(fy < 2017 & fund == "0364", str_replace(fund, "0","9"), fund)) %>%
  
  mutate(fund =  ifelse(fy < 2018 & fund %in% c("0818", "0767", "0671", "0593", "0578"), str_replace(fund, "0","9"), fund)) %>%
  mutate(fund = ifelse(fy>1999 & fy < 2018 & fund == "0231", "10231", fund) ) %>%
  
  mutate(fund = ifelse(fy < 2019 & fund %in% c("0161", "0489", "0500", "0612", "0893", "0766"), str_replace(fund, "0","9"), fund)) %>%
  
  mutate(fund =  ifelse(fy < 2020 & fund %in% c("0254", "0304", "0324", "0610", "0887", "0908", "0939", "0968"), str_replace(fund, "0","9"), fund)) %>%
  
  mutate(fund =  ifelse(fy < 2021 & fund %in% c("0255", "0325", "0348", "0967", "0972"), str_replace(fund, "0","9"), fund) ) %>%
  
   #2022 changes
  mutate(fund = ifelse(fy < 2022 & fund %in% c("0110","0165","0351", "0392", "0393", "0422", "0544", "0628", "0634",  "0656", "0672", "0683", "0723", "0742", "0743"), str_replace(fund, "0","9"), as.character(fund))) %>%  # replaces first 0 it finds with a 9
  mutate(fund = ifelse(fy < 2022 & fund == "0367", "10367", as.character(fund)) # fund reused for 3rd time
)
# if first character is a 0, replace with a 9

exp_1998_2022 <- allexpfiles %>%
      mutate(fund = ifelse(fy < 2002 & fund %in% c("0730", "0241", "0350", "0367", "0381", "0382", "0526", "0603", "0734", "0913", "0379"), str_replace(fund, "0","9"), fund)) %>%
  
  mutate(fund = ifelse(fy < 2008 & fund %in% c("0027", "0033", "0037", "0058", "0062", "0066", "0075", "0083", "0116", "0119", "0120", "0122", "0148", "0149", "0157", "0158", "0166", "0194", "0201", "0209", "0211", "0217", "0223", "0231", "0234", "0253", "0320", "0503", "0505", "0512", "0516", "0531", "0532", "0533", "0547", "0563", "0579", "0591", "0606", "0616", "0624", "0659", "0662", "0665", "0676", "0710", 
"0068", "0076", "0115", "0119", "0168", "0182", "0199", "0241", "0307", "0506", "0509", "0513"), str_replace(fund, "0","9"), fund)) %>%
  
  mutate(fund = ifelse(fy < 2016 & fund %in% c("0263", "0399", "0409"), str_replace(fund, "0","9"), fund)) %>%
  
  mutate(fund =  ifelse(fy < 2017 & fund == "0364", str_replace(fund, "0","9"), fund)) %>%
  
  mutate(fund =  ifelse(fy < 2018 & fund %in% c("0818", "0767", "0671", "0593", "0578"), str_replace(fund, "0","9"), fund)) %>%
  mutate(fund = ifelse(fy>1999 & fy < 2018 & fund == "0231", "10231", fund) ) %>%
  
  mutate(fund = ifelse(fy < 2019 & fund %in% c("0161", "0489", "0500", "0612", "0893", "0766"), str_replace(fund, "0","9"), fund)) %>%
  
  mutate(fund =  ifelse(fy < 2020 & fund %in% c("0254", "0304", "0324", "0610", "0887", "0908", "0939", "0968"), str_replace(fund, "0","9"), fund)) %>%
  
  mutate(fund =  ifelse(fy < 2021 & fund %in% c("0255", "0325", "0348", "0967", "0972"), str_replace(fund, "0","9"), fund))  %>%
  
  #2022 changes
  mutate(fund = ifelse(fy < 2022 & fund %in% c("0110","0165","0351", "0392", "0393", "0422", "0544", "0628", "0634",  "0656", "0672", "0683","0723", "0742", "0743"), str_replace(fund, "0","9"), as.character(fund))) %>%  # replaces first 0 it finds with a 9
  mutate(fund = ifelse(fy < 2022 & fund == "0367", "10367", as.character(fund)) # fund reused for 3rd time 
  )

Note: exp:1998_2022 and the funds_ab_in_2021 dataframes have a fund_cat_name variable (AND THEY DONT MATCH 100%) which ends up creating a .x and .y version of the variable when they are joined together. Inspect this more later. It is not a huge concern because the fund number is what matters more.

funds_ab_in_2022 = readxl::read_excel("C:/Users/aleaw/OneDrive/Desktop/PhD Fall 2021 - Spring 2022/Merriman RA/Fiscal Futures FY2022/Replication-Files/funds_ab_in_2022.xlsx")


exp_temp <- exp_1998_2022 %>% 
  arrange(fund, fy) %>%
  filter(expenditure != 0) %>% # keeps everything that is not zero
# join  funds_ab_in_2021  to exp_temp
 left_join(funds_ab_in_2022, by = "fund")  # matches most recent fund number and name
  • the initial combined years of data are saved as dataframes named exp_1998_2022 and rev_1998_2022. These are then saved as exp_temp and rev_temp while recoding variables. This is BEFORE category groups are created and cleaned below. Only a temporary file, do not use for analysis.
# remove from computer memory to free up space (in case your computer needs it)
rm(allexpfiles)
rm(allrevfiles)

Modify Expenditure File

  1. Medicaid is made up of General Revenue Fund and special funds added together. Not that there is a state and federal portion to the total Fiscal Futures Medicaid expenditures. Our estimate of General Revenue Fund expenditures is added to the warrants issued from the special-purpose Medicaid funds to arrive at the total Fiscal Futures estimate for Medicaid expenditures. The rationale for this method is described below.

  2. Elementary and Secondary Education. Total expenditures of the State Board of Education (less retirement contributions)

  3. Human Services - (Medicaid expenditures & State Employee Health Care)

  4. State Employee Health Care. Sum of expenditures for “health care coverage as elected by members per state employees group insurance act.” The payments are made from the Health Insurance Reserve Fund. We subtract the share that came from employee contributions.

Tax refunds

Aggregate expenditures: Save tax refunds as negative revenue. Code refunds to match the rev_type codes (02=income taxes, 03 = corporate income taxes, 06=sales tax, 09=motor fuel tax, 24=insurance taxes and fees, 35 = all other tax refunds)

## negative revenue becomes tax refunds

tax_refund_long <- exp_temp %>% 
 # fund != "0401" # removes State Trust Funds
  filter(fund != "0401" & (object=="9910"|object=="9921"|object=="9923"|object=="9925")) %>%
  # keeps these objects which represent revenue, insurance, treasurer,and financial and professional reg tax refunds
  mutate(refund = case_when(
    fund=="0278" & sequence == "00" ~ "02",   # for income tax refund
    fund=="0278" & sequence == "01" ~ "03", # tax administration and enforcement and tax operations become corporate income tax refund
     fund == "0278" & sequence == "02" ~ "02",
    object=="9921" ~ "21", # inheritance tax and estate tax refund appropriation
    object=="9923" ~ "09", # motor fuel tax refunds
    obj_seq_type == "99250055" ~ "06", # sales tax refund
    fund=="0378" & object=="9925" ~ "24", # insurance privilege tax refund
    fund=="0001" & object=="9925" ~ "35", #all other taxes
      T ~ "CHECK")) # if none of the items above apply to the observations, then code them as 00 

    
exp_temp <- left_join(exp_temp, tax_refund_long) %>%
  mutate(refund = ifelse(is.na(refund),"not refund", as.character(refund)))

tax_refund <- tax_refund_long %>% 
  group_by(refund, fy)%>%
  summarize(refund_amount = sum(expenditure, na.rm = TRUE)/1000000) %>%
  pivot_wider(names_from = refund, values_from = refund_amount, names_prefix = "ref_") %>%
  mutate_all(~replace_na(.,0)) %>%
  arrange(fy)

exp_temp <- exp_temp %>% filter(refund == "not refund")

# remove the items we recoded in tax_refund_long
#exp_temp <- anti_join(exp_temp, tax_refund_long) # should be 156 fewer observations after antijoin

tax_refund will ultimately be removed from expenditure totals and instead subtracted from revenue totals (since they were tax refunds).

# early agencies replaced by successors
# recodes old agency numbers to consistent agency number
exp_temp <- exp_temp %>% 
  mutate(agency = case_when(
    (agency=="438"| agency=="475" |agency == "505") ~ "440",
    # financial institution &  professional regulation &
     # banks and real estate  --> coded as  financial and professional reg
    agency == "473" ~ "588", # nuclear safety moved into IEMA
    (agency =="531" | agency =="577") ~ "532", # coded as EPA
    (agency =="556" | agency == "538") ~ "406", # coded as agriculture
    agency == "560" ~ "592", # IL finance authority (fire trucks and agriculture stuff)to state fire marshal
    agency == "570" & fund == "0011" ~ "494",   # city of Chicago road fund to transportation
    TRUE ~ (as.character(agency)))) 

Pensions

State payments to the following pension systems:

• Teachers Retirement System (TRS)
• State Employee Retirement System (SERS)
• State University Retirement System (SURS)
• Judges Retirement System (JRS)
• General Assembly Retirement System (GARS)

“Operating costs of administering the pensions are not included in this category. Fiscal Futures only includes the state’s payments into the pension funds as”pension expenditures.” Note also that these payments are subtracted from reported agency spending in calculating other categories.”

  • Then why are we including objects 1160-1165

obj_seq_type== “11600000” should NOT be included in pensions, correct?

  • check fund 0183 from 1999 to 2005, org_name = Serve America, fund_name is post traumatic stress awareness
    • added “& fund !=0183” to teacher retirement pensions code below

why are local health insurance reserves included as pensions?

  • object == 1161 & type == “00”
  • county option motor fuel tax – why is this under pensions? What should it be under?

& fund != “0183” & appr_org != “55”

  • object = 4431 catches most pension
#special accounting of pension obligation bond (POB)-funded contributions to JRS, SERS, GARS, TRS 

exp_temp <-  exp_temp %>% 
  arrange(fund) %>%
  mutate(pension = case_when( 
  # objects were weird for 2010 and 2011
  (object=="4431" & fund=="0473" & (fy==2010 | fy==2011)) ~ 3, # teachers retirement system, 
  (object=="1298" & (fy==2010 | fy==2011) & (fund=="0477" | fund=="0479" | fund=="0481")) ~ 3, #judges retirement
 
  (object=="4431" | (object>"1159" & object<"1166")  ) ~ 1, # 4431 = easy to find pension items
   # objects 1159 to 1166 are all considered Retirement by Comptroller 
  # object == 1167 also appears to be Other Retirement but isn't used yet
 
 fund == "0825" ~ 4, # pension obligation acceleration bond
                                        TRUE ~ 0))


table(exp_temp$pension)
## 
##      0      1      3      4 
## 158980   8963      8     11
# create file with all pension items to find any mistakes
exp_temp %>% 
  filter(pension > 0) %>%
  write_csv("all_pensions.csv")

exp_temp %>% 
  filter(pension > 0)
exp_temp <- exp_temp %>% 
 # mutate(object = ifelse((pension == 3 & in_ff == "0"), "4431", object)) %>% # why this step?
  mutate(pension =  ifelse(pension ==1 & in_ff == "0", 2, pension)) %>% # coded as 2 if it was supposed to be excluded. Allows or checking work in between steps.
  mutate(in_ff = ifelse((pension ==2 | pension ==3 | pension == 4), "1", in_ff))

table(exp_temp$pension) 
## 
##      0      1      2      3      4 
## 158980   8812    151      8     11

Pension = 2 represents retirement pension payments that were excluded from the fiscal futures analysis by default ( in_ff was 0 because they were categorized as fund category I. State Trust Funds) but should be included and added to the revenue side under “Other Revenues” in later steps.

Summarizes the total expenditures for each pension code for each year.

#creates long version without any aggregation 
pension_2_long <- exp_temp %>%
  filter(pension == 2 ) %>%
  rename(year = fy) 


exp_temp <- anti_join(exp_temp, pension_2_long)  # 148 observations removed with antijoin


pension2_fy22<- pension_2_long %>% 
  group_by(year) %>% 
  summarize(pension_amt = sum(expenditure)/1000000)

# all other pensions (1,3,4) codes get agency code 901 for State Pension Contributions
exp_temp <- exp_temp %>% 
  mutate(agency = ifelse(pension>0, "901", as.character(agency)),
         agency_name = ifelse(agency == "901", "State Pension Contributions", as.character(agency_name)))

pension2_fy22 # used in final tables
pension_2_long %>% filter(fund == "0138" | fund == "0183" | fund == "0190" | fund == "0193") # should these be included?

Should the items above be considered pension expenditures? Currently they are coded as pension == 2 and added to revenues in later steps.

Drop Interfund transfers

  • object == 1993 is for interfund cash transfers
  • agency == 799 is for statutory transfers
  • object == 1298 is for purchase of investments and is not spending EXCEPT for pensions in 2010 and 2011 (and were recoded already to object == “4431”). Over 168,000 observations remain.
transfers_drop <- exp_temp %>% filter(
  agency == "799" | # statutory transfers
           object == "1993" |  # interfund cash transfers
           object == "1298") # purchase of investments

exp_temp <- anti_join(exp_temp, transfers_drop) # 13650 obs dropped with antijoin

State employee healthcare costs

Fund = 0457 is “Group insurance premium”, in_ff = 1 Fund = 0193 is “Local govt health insurance reserve”, in=ff = 0 fund = 0477 is “Community College Health Insurance”, in=ff = 0.
- had large amount in early years Fund = 0907 = health insurance reserve, in_ff = 1 Fund = 9939 is “group self-insurers’ insolv”, in_ff = 1 Fund = 0940 is Self-Insurers security, in_ff = 0 Fund = 0739 is Group Workers Comp Pool Insol, in_ff = 1

Observations that remain in the eehc2_amt are interagency receipts.

Employer contributions for group insurance (contributions count as a revenue source).

Creates the employee healthcare “costs” amount to be added to the revenue side that would have been excluded due to being I. State Trust Funds and therefore in_ff=0 : Employer contributions are a revenue source and should be subtracted from state employee healthcare costs (premiums - healthcare payouts = net costs for state).

Added line of code Sept. 21 2022: eehc = ifelse(obj_seq_type == “19000000” & wh_approp_name == “GROUP INSURANCE”, 1, eehc)) %>%

  • Should move group insurance that was categorized from central management to employee health costs.

  • eehc = additional group insurance items under obj_seq_type == “11201000” in 2019 but smaller dollar amount

  • object 1900 is for Lump Sums and Other Purposes

# eehc = 0 means it is NOT a state healthcare cost
# eehc = 1 means it is a state employee healthcare cost

group_ins <- exp_temp %>% 
  mutate(eehc = ifelse(
    # group insurance contributions for 1998-2005 and 2013-present
   fund == "0001" & (object == "1180" | object =="1900") & agency == "416" & appr_org=="20", 0, 1) )%>% 
  mutate(eehc = ifelse(
    # group insurance contributions for 2006-2012
    fund == "0001" & object == "1180" & agency == "478" & appr_org=="80", 0, eehc) )%>%
  filter(eehc == 0) %>% 
    group_by(fy) %>% 
    summarize(dropped_group_premiums = sum(expenditure))


healthcare_costs <- exp_temp %>% 
  mutate(eehc = ifelse(
    # group insurance contributions for 1998-2005 and 2013-present
   fund == "0001" & (object == "1180" | object =="1900") & agency == "416" & appr_org=="20", 0, 1) )%>% 
  mutate(eehc = ifelse(
    # group insurance contributions for 2006-2012
    fund == "0001" & object == "1180" & agency == "478" & appr_org=="80", 0, eehc) )%>%
  mutate(expenditure = ifelse(eehc==0, 0, expenditure)) %>%    
  group_by(fy) %>% 
    summarize(cost_of_provision = sum(expenditure))

exp_temp <- exp_temp %>% 
  mutate(eehc = ifelse(
    # group insurance contributions for 1998-2005 and 2013-present
   fund == "0001" & (object == "1180" | object =="1900") & agency == "416" & appr_org=="20", 0, 1) )%>% 
  mutate(eehc = ifelse(
    # group insurance contributions for 2006-2012
    fund == "0001" & object == "1180" & agency == "478" & appr_org=="80", 0, eehc) )%>%
  mutate(expenditure = ifelse(eehc==0, 0, expenditure)) 

exp_temp_check <- exp_temp %>% 
  mutate(agency = case_when(   # turns specific items into State Employee Healthcare (agency=904)
      fund=="0907" & (agency=="416" & appr_org=="20") ~ "904",   # central management Bureau of benefits using health insurance reserve 
      fund=="0907" & (agency=="478" & appr_org=="80") ~ "904",   # agency = 478: healthcare & family services using health insurance reserve - stopped using this in 2012
    #  fund=="0001" & appr_org=="20" & object=="1900" & agency=="416" & (fy>2002 & fy<2006) ~ "904",
      
    #  fund=="0001" & appr_org=="20" & object=="1900" & agency=="416" & (fy>2020) ~ "904",
      
    #  obj_seq_type == "19000000" & wh_approp_name == "GROUP INSURANCE" & (fy>2020) ~ "904",

      TRUE ~ as.character(agency))) %>%
  mutate(agency_name = ifelse(agency == "904", "STATE EMPLOYEE HEALTHCARE", as.character(agency_name)),
         group = ifelse(agency == "904", "904", as.character(agency))) %>% # creates group variable
filter(group == "904") %>% group_by(fy) %>% summarise(healthcare_cost = sum(expenditure))
exp_temp_check
# Looks good, Sept 28 AWM

exp_temp <- exp_temp %>% 
  mutate(agency = case_when(   # turns specific items into State Employee Healthcare (agency=904)
      fund=="0907" & (agency=="416" & appr_org=="20") ~ "904",   # central management Bureau of benefits using health insurance reserve 
      fund=="0907" & (agency=="478" & appr_org=="80") ~ "904",   # agency = 478: healthcare & family services using health insurance reserve - stopped using this in 2012
      TRUE ~ as.character(agency))) %>%
  mutate(agency_name = ifelse(agency == "904", "STATE EMPLOYEE HEALTHCARE", as.character(agency_name)),
         group = ifelse(agency == "904", "904", as.character(agency)))  # creates group variable

#Default group = agency number

# this code for dealing with group insurance means that eehc no longer needs to be created or added into revenue at a later stage.
# identifies eehc values that would have been excluded due to in_ff == 0 before recoding

exp_temp <- exp_temp %>% 
  mutate(eehc = ifelse(
   ( (object == "1180" | object =="1900") & agency == "416") |
      (agency == "478" & object == "1180"  ), 1, 0)) %>%
  #mutate(eehc = ifelse(obj_seq_type == "19000000" & wh_approp_name == "GROUP INSURANCE" & fy > 2020, 1, eehc)  ) %>%
  #  mutate(expenditure = ifelse(obj_seq_type == "19000000" & wh_approp_name == "GROUP INSURANCE" & fy > 2020, 0, expenditure)  ) %>% # Francis's method 9.22.22
  mutate(eehc = ifelse((eehc == 1 & in_ff =="0"), 2, eehc)) %>% # if eehc == 1 AND in_ff was zero, then recode eehc to 2, otherwise leave eehc as it was.  Mostly helps flag things that would have been excluded due to default in_ff coding 
  mutate(in_ff = ifelse(eehc == 2, "1", in_ff) ) 




|(agency == "478" & object == "1180"  ), 1, 0)) %>%
  #mutate(eehc = ifelse(obj_seq_type == "19000000" & wh_approp_name == "GROUP INSURANCE" & fy > 2020, 1, eehc)  ) %>%
  #  mutate(expenditure = ifelse(obj_seq_type == "19000000" & wh_approp_name == "GROUP INSURANCE" & fy > 2020, 0, expenditure)  ) %>% # Francis's method 9.22.22
  mutate(eehc = ifelse((eehc == 1 & in_ff =="0"), 2, eehc)) %>% # if eehc == 1 AND in_ff was zero, then recode eehc to 2, otherwise leave eehc as it was.  Mostly helps flag things that would have been excluded due to default in_ff coding 
  mutate(in_ff = ifelse(eehc == 2, "1", in_ff) )

eehc_2_long <- exp_temp %>%
# recodes in_ff to 1 if eehc was coded to 2 to make sure they are included in fiscal futures.
  filter(eehc == 2) # keeps only eehc == 2, items that would have been excluded based on in_ff original coding

# 146 observations
eehc_2_long

# summarizes by year totals for state employee healthcare costs == 2
eehc2_amt <- eehc_2_long %>% group_by(fy) %>%
  summarize(eehc = sum(expenditure, na.rm = TRUE)/1000000)

# examine all eehc items in csv file to check mistakes
exp_temp %>%  
  mutate(eehc = ifelse(object == "1180", 1, 0)) %>%
  mutate(eehc = ifelse(obj_seq_type == "19000000" & wh_approp_name == "GROUP INSURANCE" & fy > 2020, 1, eehc)  ) %>%
  mutate(eehc = ifelse((eehc == 1 & in_ff =="0"), 2, eehc)) %>% 
  filter(eehc >0) %>% 
  write_csv("all_eehc.csv")

exp_temp <- anti_join(exp_temp, eehc_2_long, by = c("fy", "fund", "fund_name", "agency", "agency_name", "appr_org", "org_name", "obj_seq_type", "appn_net_xfer", "expenditure", "data_source", "object", "category", "sequence", "type", "trans_agency", "trans_type", "wh_approp_name"))
# should remove the 146 eehc==2 observations from exp_temp

# 149451 - 146 = 149305 obs (expected value after antijoin)

State Employee Health Care = Sum of expenditures for “health care coverage as elected by members per state employees group insurance act.” The payments are made from the Health Insurance Reserve Fund. We subtract the share that came from employee contributions. Employee contributions are not considered a revenue source or an expenditure in our analysis.

exp_temp <- exp_temp %>% 
  mutate(agency = case_when(   # turns specific items into State Employee Healthcare (agency=904)
      fund=="0907" & (agency=="416" & appr_org=="20") ~ "904",   # central management is agency 416
      fund=="0907" & (agency=="478" & appr_org=="80") ~ "904",   # agency = 478: healthcare & family services, stopped using this in 2012
      fund=="0001" & appr_org=="20" & object=="1900" & agency=="416" & (fy>2002 & fy<2006) ~ "904",
      
      fund=="0001" & appr_org=="20" & object=="1900" & agency=="416" & (fy>2020) ~ "904",
      
      obj_seq_type == "19000000" & wh_approp_name == "GROUP INSURANCE" & (fy>2020) ~ "904",

      TRUE ~ as.character(agency))) %>%
  mutate(agency_name = ifelse(agency == "904", "STATE EMPLOYEE HEALTHCARE", as.character(agency_name)),
         group = ifelse(agency == "904", "904", as.character(agency))) # creates group variable. 

#Default group = agency number

Local Transfers

Separate transfers to local from parent agencies that come from DOR(492) or Transportation (494). Treats muni revenue transfers as expenditures, not negative revenue.

The share of certain taxes levied state-wide at a common rate and then transferred to local governments. (Purely local-option taxes levied by specific local governments with the state acting as collection agent are not included.)

The five corresponding revenue items are:

• Local share of Personal Income Tax
• Local share of General Sales Tax
• Personal Property Replacement Tax on Business Income
• Personal Property Replacement Tax on Public Utilities
• Local share of Motor Fuel Tax

Add the mft mentioned in GOMB email

exp_temp <- exp_temp %>% mutate(
  agency = case_when(fund=="0515" & object=="4470" & type=="08" ~ "971", # income tax 
                     fund=="0515" & object=="4491" & type=="08" & sequence=="00" ~ "971", 
                     fund=="0802" & object=="4491" ~ "972", #pprt transfer
                     fund=="0515" & object=="4491" & type=="08" & sequence=="01" ~ "976", #gst to local
                     fund=="0627" & object=="4472"~ "976" ,
                     fund=="0648" & object=="4472" ~ "976",
                     fund=="0515" & object=="4470" & type=="00" ~ "976",
                    object=="4491" & (fund=="0188"|fund=="0189") ~ "976",
                     fund=="0187" & object=="4470" ~ "976",
                     fund=="0186" & object=="4470" ~ "976",
                    object=="4491" & (fund=="0413"|fund=="0414"|fund=="0415")  ~ "975", #mft to local
                    TRUE ~ as.character(agency)),
  agency_name = case_when(agency == "971"~ "INCOME TAX 1/10 TO LOCAL",
                          agency == "972" ~ "PPRT TRANSFER TO LOCAL",
                          agency == "976" ~ "GST TO LOCAL",
                          agency == "975" ~ "MFT TO LOCAL",
                          TRUE~as.character(agency_name)),
  group = ifelse(agency>"970" & agency < "977", as.character(agency), as.character(group)))


table(exp_temp$group) 
## 
##   101   102   103   105   107   108   109   110   112   115   120   131   140 
##   583     3   240   155    89   193   137   129   162   128    17   386     7 
##   155   156   167   201   210   275   280   285   290   295   310   330   340 
##    75   117   118  1345    15   399     1   234   470  1185   213   205   819 
##   350   360   370   402   406   416   418   420   422   425   426   427   440 
##  4098  1740   803  1829  4660  3924  2420 10975  9668  1038  7614   779  3705 
##   442   444   445   446   448   452   458   466   478   482   492   493   494 
##   596 11357    23  1119    22   610   305   587  3060  5524  4129  1924  9559 
##   497   503   506   507   509   510   511   517   520   524   525   526   527 
##  2519   421    17   332    33    26  8954   128     5  1126    28   174    40 
##   528   529   532   533   534   537   540   541   542   546   548   554   555 
##  1838    18  5746     2     5   192    64  1305   174   873   264    26    25 
##   557   558   559   562   563   564   565   567   568   569   571   574   575 
##   208   280   245    19   699    17   198   176     2   450    65    80    85 
##   576   578   579   580   583   585   586   587   588   589   590   591   592 
##     1   233   438   327    21    43  5290   683  2681   593   166   188  1070 
##   593   598   601   608   612   616   620   628   636   644   664   676   684 
##   147    10   720   177   131   141    99   147   115   182   271   462   895 
##   691   692   693   695   901   904   971   972   975   976 
##   934   786     8   197  8825    47    25    25    75  1174
exp_temp <- exp_temp %>% filter(in_ff != 0) # drops in_ff = 0 funds AFTER dealing with net-revenue above

# 149305 obs to 145185 obs after filtering !=0

Debt Service

Debt Service expenditures include principal and interest payment on both short-term and long-term debt. We do not include escrow payments.

but we did include an escrow payment. We also were including principle payments as costs but not revenues which is incorrect. No longer including princple payments as a cost, only interest on borrowing is a cost

  • wh_approp_name == “ESCROW PYMNT RFNDNG BOND ISSUE”
  • obj_seq_type == “88410008”

Escrow payment == 88410008 Bond principle payments: obj_seq_type == 88110008 Short term borrowing principle: obj_seq_type == 88110108 Interest: obj_seq_type == 88130000 & 88130008 Interest for short-term borrowing: 88130108 Tollway Principal AND Interest: obj_seq_type == 88000055
- Note: Tollway Principle Payments and Interests are included as one item.

8813____ is for interest INCLUDE AS COST 8811____ is for principle EXCLUDE from analysis 8841____ is for escrow payments EXCLUDE from analysis

8800____ is for tollway

# exp_temp <- exp_temp %>% 
#   # objects in the 8000s refer to debt service
#   # fund 0455 is for the IL Tollway
#   mutate(agency = if_else(object>"7999" & object<"9000" & fund!="0455", "903", as.character(agency)),
#          agency_name = if_else(agency == "903", "DEBT SERVICE", as.character(agency_name)),
#          group = if_else(agency == "903", "903", as.character(group)))


princ_pmt <- exp_temp %>% 
  filter(object == "8811" | object == "8841") # principal and escrow

exp_temp <- anti_join(exp_temp, princ_pmt) %>%
  # mutate(group = if_else(object == "8813", "903", as.character(group)),
  #        agency = if_else(object == "8813", "903", as.character(agency_name)),
  #        agency_name = if_else(agency == "903", "DEBT SERVICE", as.character(agency_name)))

  mutate(agency = if_else(object== "8813", "903", as.character(agency)),
        agency_name = if_else(agency == "903", "DEBT SERVICE", as.character(agency_name)),
        group = if_else(agency == "903", "903", as.character(group)))

Add Fiscal Future group codes

exp_temp<- exp_temp %>%
  #mutate(agency = as.numeric(agency) ) %>%
  # arrange(agency)%>%
  mutate(
    group = case_when(
      agency>"100"& agency<"200" ~ "910", # legislative
      
      agency == "528"  | (agency>"200" & agency<"300") ~ "920", # judicial
      
      (agency>"309" & agency<"400") ~ "930",    # elected officers
      
      agency == "586" ~ "959", # create new K-12 group

      agency=="402" | agency=="418" | agency=="478" | agency=="444" | agency=="482" ~ as.character(agency), # aging, CFS,HFS, human services, public health
      T ~ as.character(group)),
    
      #chip = ifelse(fund == "0001" & agency == "478" & appr_org == "65" &object=="4900" & (sequence == "20" | sequence == "54" | sequence == "61" | sequence == "62" | sequence == "65"),1 ,0) 
    ) %>%      

  
  mutate(group = case_when(
    agency=="478" & (appr_org=="01" | appr_org == "65" | appr_org=="88") & (object=="4900" | object=="4400") ~ "945", # separates CHIP from health and human services and saves it as Medicaid
    
    agency == "586" & fund == "0355" ~ "478",  # 586 (Board of Edu) has special education which is part of medicaid
    #agency == "586" & appr_org == "18" ~ "945", # Spec. Edu Medicaid Matching
    
    agency=="425" | agency=="466" | agency=="546" | agency=="569" | agency=="578" | agency=="583" | agency=="591" | agency=="592" | agency=="493" | agency=="588" ~ "941", # public safety & Corrections
    
    agency=="420" | agency=="494" |  agency=="406" | agency=="557" ~ as.character(agency), # econ devt & infra
    
    agency=="511" | agency=="554" | agency=="574" | agency=="598" ~ "946",  # Capital improvement
    
    agency=="422" | agency=="532" ~ as.character(agency), # environment & nat. resources
    
    agency=="440" | agency=="446" | agency=="524" | agency=="563"  ~ "944", # business regulation
    
    agency=="492" ~ "492", # revenue
    agency == "416" ~ "416", # central management services
    
    agency=="448" & fy > 2016 ~ "416", #add DoIT to central management 
    
    T ~ as.character(group))) %>%
  
  
  mutate(group = case_when(
    agency=="684" | agency=="691"  ~ as.character(agency),
    
    agency=="692" | agency=="695" | (agency>"599" & agency<"677") ~ "960", # higher education
    
    agency=="427"  ~ as.character(agency), # employment security
    
    agency=="507"|  agency=="442" | agency=="445" | agency=="452" |agency=="458" | agency=="497" ~ "948", # other departments
    
    # other boards & Commissions
    agency=="503" | agency=="509" | agency=="510" | agency=="565" |agency=="517" | agency=="525" | agency=="526" | agency=="529" | agency=="537" | agency=="541" | agency=="542" | agency=="548" |  agency=="555" | agency=="558" | agency=="559" | agency=="562" | agency=="564" | agency=="568" | agency=="579" | agency=="580" | agency=="587" | agency=="590" | agency=="527" | agency=="585" | agency=="567" | agency=="571" | agency=="575" | agency=="540" | agency=="576" | agency=="564" | agency=="534" | agency=="520" | agency=="506" | agency == "533" ~ "949", 
    
    # non-pension expenditures of retirement funds moved to "Other Departments"
    agency=="131" | agency=="275" | agency=="589" |agency=="593"|agency=="594"|agency=="693" ~ "948",
    
    T ~ as.character(group))) %>%
  
  mutate(group_name = 
           case_when(
             group == "900" ~ "NOT IN FRAME",
             group == "901" ~ "STATE PENSION CONTRIBUTION",
             group == "903" ~ "DEBT SERVICE",
             group == "910" ~ "LEGISLATIVE"  ,
             group == "920" ~ "JUDICIAL" ,
             group == "930" ~ "ELECTED OFFICERS" , 
             group == "940" ~ "OTHER HEALTH-RELATED", 
             group == "941" ~ "PUBLIC SAFETY" ,
             group == "942" ~ "ECON DEVT & INFRASTRUCTURE" ,
             group == "943" ~ "CENTRAL SERVICES",
             group == "944" ~ "BUS & PROFESSION REGULATION" ,
             group == "945" ~ "MEDICAID" ,
             group == "946" ~ "CAPITAL IMPROVEMENT" , 
             group == "948" ~ "OTHER DEPARTMENTS" ,
             group == "949" ~ "OTHER BOARDS & COMMISSIONS" ,
             group == "959" ~ "K-12 EDUCATION" ,
             group == "960" ~ "UNIVERSITY EDUCATION" ,
             group == agency ~ as.character(group),
             TRUE ~ "Check name"),
         year = fy)

table(exp_temp$group)
## 
##   402   406   416   418   420   422   426   427   444   478   482   492   494 
##  1829  4602  3595  2420 10964  9663  7612   779 11325  1661  5523  3318  9536 
##   532   557   684   691   901   903   904   910   920   930   941   944   945 
##  5703   208   895   910  8825    56    47  2153  5060  7326  9035  6521   866 
##   946   948   949   959   960   971   972   975   976 
##  9009  4513  5610  5235  3080    25    25    75  1174
# number of observations within each group category

table(exp_temp$group_name)
## 
##                         402                         406 
##                        1829                        4602 
##                         416                         418 
##                        3573                        2420 
##                         420                         422 
##                       10964                        9663 
##                         426                         427 
##                        7612                         779 
##                         444                         478 
##                       11325                        1657 
##                         482                         492 
##                        5523                        3318 
##                         494                         532 
##                        9536                        5703 
##                         557                         684 
##                         208                         895 
##                         691                         904 
##                         910                          47 
##                         971                         972 
##                          25                          25 
##                         975                         976 
##                          75                        1174 
## BUS & PROFESSION REGULATION         CAPITAL IMPROVEMENT 
##                        6521                        9009 
##                  Check name                DEBT SERVICE 
##                          26                          56 
##            ELECTED OFFICERS                    JUDICIAL 
##                        7326                        5060 
##              K-12 EDUCATION                 LEGISLATIVE 
##                        5235                        2153 
##                    MEDICAID  OTHER BOARDS & COMMISSIONS 
##                         866                        5610 
##           OTHER DEPARTMENTS               PUBLIC SAFETY 
##                        4513                        9035 
##  STATE PENSION CONTRIBUTION        UNIVERSITY EDUCATION 
##                        8825                        3080
transfers_long <- exp_temp %>% 
  filter(group == "971" |group == "972" | group == "975" | group == "976")

transfers <- transfers_long %>%
  group_by(year, group ) %>%
  summarize(sum_expenditure = sum(expenditure)/1000000) %>%
  pivot_wider(names_from = "group", values_from = "sum_expenditure", names_prefix = "exp_" )

exp_temp <- anti_join(exp_temp, transfers_long) 

# write_csv(exp_temp, "all_expenditures_recoded.csv")

All expenditures recoded but not aggregated: Allows for inspection of individual expenditures within larger categories. This stage of the data is extremely useful for investigating almost all questions we have about the data.

Note that these are the raw figures BEFORE we take the additional steps:

  • Subtract optional premiums from State Employee Healthcare expenditures
  • Subtract refunds from revenues by revenue type.
  • Add employee health costs and certain pension contributions to All Other Revenues
exp_temp %>%
  group_by(year, group) %>%
  summarize(sum_expenditure = sum(expenditure)/1000000) %>%
  arrange(year) %>%
  pivot_wider(names_from = "group", values_from = "sum_expenditure")
aggregate_exp_labeled <- exp_temp %>%
  group_by(year, group_name) %>%
  summarize(sum_expenditure = sum(expenditure)/1000000) %>%
  arrange(year) %>%
  pivot_wider(names_from = "group_name", values_from = "sum_expenditure")

aggregate_exp_labeled

Modify Revenue data

Revenue Categories not included in Fiscal Futures:
- 32. Garnishment-Levies. (State is fiduciary, not beneficiary.)
- 45. Student Fees-Universities. (Excluded from state-level budget.)
- 51. Retirement Contributions (of individuals and non-state entities).
- 66. Proceeds, Investment Maturities. (Not sustainable flow.)
- 72. Bond Issue Proceeds. (Not sustainable flow.)
- 75. Inter-Agency Receipts. (Exceptfrom Funds excluded from Fiscal Futures)
- 79. Cook County Intergovernmental Transfers. (State is not beneficiary.)
- 98. Prior Year Refunds.

All Other Sources

Expanded to include the following smaller sources:
- 30. Horse Racing Taxes & Fees.
- 60. Other Grants and Contracts.
- 63. Investment Income. - 75. Inter-Agency Receipts. (Only from Funds excluded from Fiscal Futures)

For aggregating revenue, use the rev_1998_2022 dataframe, join the funds_ab_in_2021 file to it, and then join the ioc_source_type file to the dataset.

You need to update the funds_ab_in and ioc_source_type file every year!

include how to do that later

# fund info to revenue for all years
rev_temp <- inner_join(rev_1998_2022, funds_ab_in_2022, by = "fund") %>% arrange(source)

# need to update the ioc_source_type file every year! 
ioc_source_type <- readxl::read_xlsx("C:/Users/aleaw/OneDrive/Desktop/PhD Fall 2021 - Spring 2022/Merriman RA/Fiscal Futures FY2022/Replication-Files/ioc_source_updated22_AWM.xlsx")

rev_temp <- left_join(rev_temp, ioc_source_type, by = "source")
# automatically used source, source name does not match for the join to work using source_name

rev_temp <- rev_temp %>% 
  mutate(
    rev_type = ifelse(rev_type=="57" & agency=="478" & (source=="0618"|source=="2364"|source=="0660"|source=="1552"| source=="2306"| source=="2076"|source=="0676"|source=="0692"), "58", rev_type),
    rev_type_name = ifelse(rev_type=="58", "FEDERAL TRANSPORTATION", rev_type_name),
    rev_type = ifelse(rev_type=="57" & agency=="494", "59", rev_type),
    rev_type_name = ifelse(rev_type=="59", "FEDERAL TRANSPORTATION", rev_type_name),
    rev_type_name = ifelse(rev_type=="57", "FEDERAL OTHER", rev_type_name),
    rev_type = ifelse(rev_type=="6", "06", rev_type),
       rev_type = ifelse(rev_type=="9", "09", rev_type)) 


rev_temp %>% 
  group_by(fy, rev_type_name) %>% 
  summarise(receipts = sum(receipts, na.rm = TRUE)/1000000)

Optional Medicaid Payments

0120 = ins prem-option life 0120 = ins prem-opttional life/univ

0345 = 0347 = optional health - HMO 0348 = optional health - dental 0349 = optional health - univ/local SI 0350 = optional health - univ/local 0351 = optional health - retirement 0352 = optional health - retirement SI 0353 = optional health - retire/dental 0354 = optional health - retiremet hmo

2199-2209 = various HMOs, dental, health plans from Health Insurance Reserve (fund)

opt_premiums_CHECK <- rev_temp %>% 
  filter((fund=="0907" | fund == "0457") & (source=="0120" | source=="0121" | (source>"0345" & source<"0357") | (source>"2199" & source<"2209")                                            ) 
         )


rev_temp <-  rev_temp %>% mutate(med_option_recent = ifelse(
    fund=="0907" & (source=="0120"| source=="0121"| (source>"0345" & source<"0357")|(source>"2199" & source<"2209")), 1, 0))

#collect optional insurance premiums to fund 0907 for use in eehc expenditure  
rev_temp <- rev_temp %>% 
  mutate(med_option_recent = ifelse(
    fund=="0907" & (source=="0120"| source=="0121"| (source>"0345" & source<"0357")|(source>"2199" & source<"2209")), 1, 0),
    # adds more rev_types
    rev_type = case_when(
      fund =="0427" ~ "12", # pub utility tax
      fund == "0742" | fund == "0473" ~ "24", # insurance and fees
      fund == "0976" ~ "36",# receipts from rev producing
      fund == "0392" |fund == "0723" ~ "39", # licenses and fees
      fund == "0656" ~ "78", #all other rev sources
      TRUE ~ as.character(rev_type)))
#if not mentioned, then rev_type as it was
# optional insurance premiums
med_option_recent <- rev_temp %>%
  group_by(fy, med_option_recent) %>%
  summarize(med_option_amt_recent = sum(receipts)/1000000) %>%
  filter(med_option_recent == 1) %>%
  rename(year = fy) %>% 
  select(-med_option_recent)

med_option_long <- rev_temp %>%  filter(med_option_recent == 1)
# 361 observations have med_option_recent == 1

med_option_long %>% 
  group_by(fy, med_option_recent) %>%
  summarize(med_option_amt_recent = sum(receipts)/1000000) %>%
  rename(year = fy) %>% 
  select(-med_option_recent)
rev_temp <- rev_temp %>% filter(med_option_recent != 1)

Still need to add med_option data to Other Revenues

rev_temp <- rev_temp %>% 
  filter(in_ff == 1) %>% 
  mutate(local = ifelse(is.na(local), 0, local)) %>%
  filter(local != 1)


in_from_out <- c("0847", "0867", "1175", "1176", "1177", "1178", "1181", "1182", "1582", "1592", "1745", "1982", "2174", "2264")

rev_temp <- rev_temp %>% 
  mutate(rev_type_new = ifelse(source %in% in_from_out, "76", rev_type))
# if source contains any of the codes in in_from_out, code them as 76 (all other rev).

# revenue types to drop
drop_type <- c("32", "45", "51", "66", "72", "75", "79", "98")

# drops Blank, Student Fees, Retirement contributions, proceeds/investments,
# bond issue proceeds, interagency receipts, cook IGT, Prior year refunds.


rev_temp <- rev_temp %>% filter(!rev_type_new %in% drop_type)
# keep observations that do not have a revenue type mentioned in drop_type

table(rev_temp$rev_type_new)
## 
##    02    03    06    09    12    15    18    21    24    27    30    31    33 
##   161   124   828   127   575   258    45  1420   450    76   659   124   130 
##    35    36    39    42    48    54    57    58    59    60    63    76    78 
##   660  5152  9044  2755    31  1239  6451   620   226   103  5081   154 10880 
##    99 
##   964
rev_temp %>% 
  group_by(fy, rev_type_new) %>% 
  summarize(total_reciepts = sum(receipts)/1000000) %>%
  pivot_wider(names_from = rev_type_new, values_from = total_reciepts, names_prefix = "rev_") 

Transfers in and Out: in_from_out <- c(“0847”, “0867”, “1175”, “1176”, “1177”, “1178”, “1181”, “1182”, “1582”, “1592”, “1745”, “1982”, “2174”, “2264”)

# combines smallest 4  categories to to "Other"
# they were the 4 smallest in past years, are they still the 4 smallest? 

rev_temp <- rev_temp %>%  
  mutate(rev_type_new = ifelse(rev_type=="30" | rev_type=="60" | rev_type=="63" | rev_type=="76" | rev_type=="78" , "78", rev_type_new))


#table(rev_temp$rev_type_new)  # check work


rm(rev_1998_2022)
rm(exp_1998_2022)

Pivoting and Merging

  • State employer contributions (eehc from eehc2_amt) should be moved to Other revenues.

  • State pension contributions (pension_amt from pension2_fy22) should be added to Other revenues.

  • Local Government Transfers (exp_970) should be on the expenditure side.

  • Subtract employee insurance premiums from 904 (State Employee Healthcare Expenditures - Employee Premiums = Actual state healthcare costs. Subtract med_option_amt_recent in med_option_recent from exp_904 in ff_exp).

Revenues

ff_rev <- rev_temp %>% 
  group_by(rev_type_new, fy) %>% 
  summarize(sum_receipts = sum(receipts, na.rm=TRUE)/1000000 ) %>%
  pivot_wider(names_from = "rev_type_new", values_from = "sum_receipts", names_prefix = "rev_")

ff_rev<- left_join(ff_rev, tax_refund)

ff_rev <- left_join(ff_rev, pension2_fy22, by=c("fy" = "year"))

#ff_rev <- left_join(ff_rev, eehc2_amt) 
ff_rev <- mutate_all(ff_rev, ~replace_na(.,0))


ff_rev <- ff_rev %>%
  mutate(rev_02 = rev_02 - ref_02,
         rev_03 = rev_03 - ref_03,
         rev_06 = rev_06 - ref_06,
         rev_09 = rev_09 - ref_09,
         rev_21 = rev_21 - ref_21,
         rev_24 = rev_24 - ref_24,
         rev_35 = rev_35 - ref_35,

         rev_78new = rev_78 + pension_amt #+ eehc
         ) %>% 
  select(-c(ref_02:ref_35, rev_76, rev_78, rev_99, rev_NA, pension_amt
          #  , eehc
            ))

ff_rev

Since I already pivot_wider()ed the table in the previous code chunk, I now change each column’s name by using rename() to set new variable names. Ideally the final dataframe would have both the variable name and the variable label but I have not done that yet.

aggregate_rev_labels <- ff_rev %>%
  rename("INDIVIDUAL INCOME TAXES, gross of local, net of refunds" = rev_02,
         "CORPORATE INCOME TAXES, gross of PPRT, net of refunds" = rev_03,
         "SALES TAXES, gross of local share" = rev_06 ,
         "MOTOR FUEL TAX, gross of local share, net of refunds" = rev_09 ,
         "PUBLIC UTILITY TAXES, gross of PPRT" = rev_12,
         "CIGARETTE TAXES" = rev_15 ,
         "LIQUOR GALLONAGE TAXES" = rev_18,
         "INHERITANCE TAX" = rev_21,
         "INSURANCE TAXES&FEES&LICENSES, net of refunds" = rev_24 ,
         "CORP FRANCHISE TAXES & FEES" = rev_27,
      #   "HORSE RACING TAXES & FEES" = rev_30,  # in Other
         "MEDICAL PROVIDER ASSESSMENTS" = rev_31 ,
         # "GARNISHMENT-LEVIES " = rev_32 , # dropped
         "LOTTERY RECEIPTS" = rev_33 ,
         "OTHER TAXES" = rev_35,
         "RECEIPTS FROM REVENUE PRODUCNG" = rev_36, 
         "LICENSES, FEES & REGISTRATIONS" = rev_39 ,
         "MOTOR VEHICLE AND OPERATORS" = rev_42 ,
         #  "STUDENT FEES-UNIVERSITIES" = rev_45,   # dropped
         "RIVERBOAT WAGERING TAXES" = rev_48 ,
         # "RETIREMENT CONTRIBUTIONS " = rev_51, # dropped
         "GIFTS AND BEQUESTS" = rev_54, 
         "FEDERAL OTHER" = rev_57 ,
         "FEDERAL MEDICAID" = rev_58, 
         "FEDERAL TRANSPORTATION" = rev_59 ,
      #   "OTHER GRANTS AND CONTRACTS" = rev_60, #other
       #  "INVESTMENT INCOME" = rev_63, # other
         # "PROCEEDS,INVESTMENT MATURITIES" = rev_66 , #dropped
         # "BOND ISSUE PROCEEDS" = rev_72,  #dropped
         # "INTER-AGENCY RECEIPTS" = rev_75,  #dropped
     #    "TRANSFER IN FROM OUT FUNDS" = rev_76,  #other
         "ALL OTHER SOURCES" = rev_78new ,
         # "COOK COUNTY IGT" = rev_79, #dropped
         # "PRIOR YEAR REFUNDS" = rev_98 #dropped
  ) 

aggregate_rev_labels
# Still contains columns that should be dropped for the clean final aggregate table. Drop the variables I don't want in the output table in the "graphs" section.  

Expenditures

Create state employee healthcare costs that reflects the health costs minus the optional insurance premiums that came in (904_new = 904 - med_option_amt_recent).

Create exp_970 for all local government transfers (exp_971 + exp_972 + exp_975 + exp_976).

ff_exp <- exp_temp %>% 
  group_by(fy, group) %>% 
  summarize(sum_expenditures = sum(expenditure, na.rm=TRUE)/1000000 ) %>%
  pivot_wider(names_from = "group", values_from = "sum_expenditures", names_prefix = "exp_")%>%
  
  # join state employee healthcare and subtract employee premiums
  left_join(med_option_recent, by = c("fy" = "year")) %>%
  mutate(exp_904_new = exp_904 - med_option_amt_recent) %>% # state employee healthcare 
  
  # join local transfers and create exp_970
  left_join(transfers, by = c("fy" = "year")) %>%
  mutate(exp_970 = exp_971 + exp_972  + exp_975 + exp_976)

ff_exp<- ff_exp %>% select(-c(exp_904, med_option_amt_recent, exp_971:exp_976)) # drop unwanted columns
ff_exp

Clean Table Outputs

Create total revenues and total expenditures only:

  • after aggregating expenditures and revenues, pivoting wider, and left_joining the additional mini dataframes (med_option_recent, pension2_fy22, eehc2_amt), then I want to drop the columns that I no longer want and then pivot_longer(). After pivoting_longer() and creating rev_long and exp_long, expenditures and revenues are in the same format and can be combined together for the totals and gap each year.
rev_long <- pivot_longer(ff_rev, rev_02:rev_78new, names_to = c("type","Category"), values_to = "Dollars", names_sep = "_") %>% 
  rename(Year = fy) %>%
  mutate(Category_name = case_when(
    Category == "02" ~ "INDIVIDUAL INCOME TAXES, gross of local, net of refunds" ,
    Category == "03" ~ "CORPORATE INCOME TAXES, gross of PPRT, net of refunds" ,
    Category == "06" ~ "SALES TAXES, gross of local share" ,
    Category == "09" ~ "MOTOR FUEL TAX, gross of local share, net of refunds" ,
    Category == "12" ~ "PUBLIC UTILITY TAXES, gross of PPRT" ,
    Category == "15" ~ "CIGARETTE TAXES" ,
    Category == "18" ~ "LIQUOR GALLONAGE TAXES" ,
    Category == "21" ~ "INHERITANCE TAX" ,
    Category == "24" ~ "INSURANCE TAXES&FEES&LICENSES, net of refunds " ,
    Category == "27" ~ "CORP FRANCHISE TAXES & FEES" ,
    Category == "30" ~ "HORSE RACING TAXES & FEES",  # in Other
    Category == "31" ~ "MEDICAL PROVIDER ASSESSMENTS" ,
    Category == "32" ~ "GARNISHMENT-LEVIES" , # dropped
    Category == "33" ~  "LOTTERY RECEIPTS" ,
    Category == "35" ~  "OTHER TAXES" ,
    Category == "36" ~  "RECEIPTS FROM REVENUE PRODUCNG", 
    Category == "39" ~  "LICENSES, FEES & REGISTRATIONS" ,
    Category == "42" ~  "MOTOR VEHICLE AND OPERATORS" ,
    Category == "45" ~  "STUDENT FEES-UNIVERSITIES",   # dropped
    Category == "48" ~  "RIVERBOAT WAGERING TAXES" ,
    Category == "51" ~  "RETIREMENT CONTRIBUTIONS" , # dropped
    Category == "54" ~ "GIFTS AND BEQUESTS", 
    Category == "57" ~  "FEDERAL OTHER" ,
    Category == "58" ~  "FEDERAL MEDICAID", 
    Category == "59" ~  "FEDERAL TRANSPORTATION" ,
    Category == "60" ~  "OTHER GRANTS AND CONTRACTS", #other
    Category == "63" ~  "INVESTMENT INCOME", # other
    Category == "66" ~ "PROCEEDS,INVESTMENT MATURITIES" , #dropped
    Category == "72" ~ "BOND ISSUE PROCEEDS",  #dropped
    Category == "75" ~  "INTER-AGENCY RECEIPTS ",  #dropped
    Category == "76" ~  "TRANSFER IN FROM OUT FUNDS",  #other
    Category == "78new" ~  "ALL OTHER SOURCES" ,
    Category == "79" ~   "COOK COUNTY IGT", #dropped
    Category == "98" ~  "PRIOR YEAR REFUNDS", #dropped
                 T ~ "Check Me!"

  ) )


exp_long <- pivot_longer(ff_exp, exp_402:exp_970 , names_to = c("type", "Category"), values_to = "Dollars", names_sep = "_") %>% 
  rename(Year = fy ) %>% 
  mutate(Category_name = 
           case_when(
             Category == "402" ~ "AGING" ,
             Category == "406" ~ "AGRICULTURE", 
             Category == "416" ~ "CENTRAL MANAGEMENT",
             Category == "418" ~ "CHILDREN AND FAMILY SERVICES", 
             Category == "420" ~ "COMMERCE AND ECONOMIC OPPORTUNITY",
             Category == "422" ~ "NATURAL RESOURCES" ,
             Category == "426" ~ "CORRECTIONS",
             Category == "427" ~ "EMPLOYMENT SECURITY" ,
             Category == "444" ~ "HUMAN SERVICES" ,
             Category == "448" ~ "Innovation and Technology", # AWM added fy2022
             Category == "478" ~ "HEALTHCARE & FAM SER NET OF MEDICAID", 
             Category == "482" ~ "PUBLIC HEALTH", 
             Category == "492" ~ "REVENUE", 
             Category == "494" ~ "TRANSPORTATION" ,
             Category == "532" ~ "ENVIRONMENTAL PROTECT AGENCY" ,
             Category == "557" ~ "IL STATE TOLL HIGHWAY AUTH" ,
             Category == "684" ~ "IL COMMUNITY COLLEGE BOARD", 
             Category == "691" ~ "IL STUDENT ASSISTANCE COMM" ,
             Category == "900" ~ "NOT IN FRAME",
             Category == "901" ~ "STATE PENSION CONTRIBUTION",
             Category == "903" ~ "DEBT SERVICE",
             Category == "904" ~ "State Employee Healthcare",
             Category == "910" ~ "LEGISLATIVE"  ,
             Category == "920" ~ "JUDICIAL" ,
             Category == "930" ~ "ELECTED OFFICERS" , 
             Category == "940" ~ "OTHER HEALTH-RELATED", 
             Category == "941" ~ "PUBLIC SAFETY" ,
             Category == "942" ~ "ECON DEVT & INFRASTRUCTURE" ,
             Category == "943" ~ "CENTRAL SERVICES",
             Category == "944" ~ "BUS & PROFESSION REGULATION" ,
             Category == "945" ~ "MEDICAID" ,
             Category == "946" ~ "CAPITAL IMPROVEMENT" , 
             Category == "948" ~ "OTHER DEPARTMENTS" ,
             Category == "949" ~ "OTHER BOARDS & COMMISSIONS" ,
             Category == "959" ~ "K-12 EDUCATION" ,
             Category == "960" ~ "UNIVERSITY EDUCATION",
             Category == "970" ~ "Local Govt Transfers",
             T ~ "CHECK ME!")
           )

 # write_csv(exp_long, "expenditures_recoded_long_FY22.csv")
 # write_csv(rev_long, "revenue_recoded_long_FY22.csv")

aggregated_totals_long <- rbind(rev_long, exp_long)
aggregated_totals_long
year_totals <- aggregated_totals_long %>% 
  group_by(type, Year) %>% 
  summarize(Dollars = sum(Dollars, na.rm = TRUE)) %>% 
  pivot_wider(names_from = "type", values_from = Dollars) %>% 
  rename(
         Expenditures = exp,
         Revenue = rev) %>%  
  mutate(Gap = Revenue - Expenditures)
# creates variable for the Gap each year

year_totals
# write_csv(aggregated_totals_long, "aggregated_totals.csv")

Graphs

Graphs made from aggregated_totals_long dataframe.

aggregated_totals_long %>%  
  filter(type == "exp") %>% # uses only expenditures
  ggplot(aes(x = Year, y = Dollars, group = Category)) +
  geom_line()+
    xlab("Year") + 
    ylab("Millions of Dollars")  +
    ggtitle("Illinois Expenditures by Category")

aggregated_totals_long %>%  
  filter(type == "rev") %>% #uses only revenues
  ggplot(aes(x = Year, y = Dollars, group = Category, label = Category_name)) +
  geom_line()+
    xlab("Year") + 
    ylab("Millions of Dollars")  +
    ggtitle("Illinois Revenues by Category")

year_totals %>%  
  ggplot() +
  # geom_smooth adds regression line, graphed first so it appears behind line graph
  geom_smooth(aes(x = Year, y = Revenue), color = "light green", method = "lm", se = FALSE) + 
  geom_smooth(aes(x = Year, y = Expenditures), color = "gray", method = "lm", se = FALSE) +
  
  # line graph of revenue and expenditures
  geom_line(aes(x = Year, y = Revenue), color = "green4") +
  geom_line(aes(x = Year, y = Expenditures), color = "black") +
  
  # labels
    theme_bw() +
  scale_y_continuous(labels = comma)+
  xlab("Year") + 
  ylab("Millions of Dollars")  +
  ggtitle("Illinois Expenditures and Revenue Totals, 1998-2022")

Expenditure and revenue amounts in millions of dollars, with and without labels:

exp_long %>%
  filter(Year == 2021) %>%
  #mutate(`Total Expenditures`= sum(Dollars, na.rm = TRUE)) %>%
 # select(-c(Year, `Total Expenditures`)) %>%
  arrange(desc(`Dollars`)) %>%
  ggplot() + 
  geom_col(aes(x = fct_reorder(Category_name, `Dollars`), y = `Dollars`))+ 
  coord_flip() +
    xlab("Expenditure Categories") +
  ylab("Millions of Dollars") +
    theme_bw()

exp_long %>%
  filter(Year == 2021) %>%
  #mutate(`Total Expenditures`= sum(Dollars, na.rm = TRUE)) %>%
 # select(-c(Year, `Total Expenditures`)) %>%
  arrange(desc(`Dollars`)) %>%
  ggplot() + 
  geom_col(aes(x = fct_reorder(Category, `Dollars`), y = `Dollars`))+ 
  coord_flip() +
    xlab("Expenditure Categories") +
  ylab("Millions of Dollars") +
    theme_bw()

rev_long %>%
  filter(Year == 2021) %>%
  #mutate(`Total Expenditures`= sum(Dollars, na.rm = TRUE)) %>%
 # select(-c(Year, `Total Expenditures`)) %>%
  arrange(desc(`Dollars`)) %>%
  ggplot() + 
  geom_col(aes(x = fct_reorder(Category_name, `Dollars`), y = `Dollars`))+ 
  coord_flip() +
    xlab("Revenue Categories") +
  ylab("Millions of Dollars") +
    theme_bw()

rev_long %>%
  filter(Year == 2021) %>%
  #mutate(`Total Expenditures`= sum(Dollars, na.rm = TRUE)) %>%
 # select(-c(Year, `Total Expenditures`)) %>%
  arrange(desc(`Dollars`)) %>%
  ggplot() + 
  geom_col(aes(x = fct_reorder(Category, `Dollars`), y = `Dollars`))+ 
  coord_flip() +
    xlab("Revenue Categories") +
  ylab("Millions of Dollars") +
    theme_bw()

Expenditure and revenues when focusing on largest categories and combining others into “All Other Expenditures(Revenues)”:

exp_long %>%
  filter( Year == 2021) %>%
  mutate(rank = rank(Dollars),
        Category_name = ifelse(rank > 13, Category_name, 'All Other Expenditures')) %>%
 # select(-c(Year, Dollars, rank)) %>%
  arrange(desc(Dollars)) %>%
  ggplot() + 
  geom_col(aes(x = fct_reorder(Category_name, `Dollars`), y = `Dollars`), fill = "light green")+ 
  coord_flip() +
    xlab("") +
    theme_bw()

rev_long %>%
  filter( Year == 2021) %>%
  mutate(rank = rank(Dollars),
        Category_name = ifelse(rank > 10, Category_name, 'All Other Expenditures')) %>%
 # select(-c(Year, Dollars, rank)) %>%
  arrange(desc(Dollars)) %>%
  ggplot() + 
  geom_col(aes(x = fct_reorder(Category_name, `Dollars`), y = `Dollars`), fill = "light blue")+ 
  coord_flip() +
    xlab("") +
    theme_bw()

Keeping the top 13 categories and grouping the rest to All Other Expenditures(Revenues). Shown as a percentage of total expenditures(revenues)

exp_long %>%
  filter( Year == 2021) %>%
  mutate(`Total Expenditures` = sum(Dollars, na.rm = TRUE),
        `Percent of Total Expenditures` = round((Dollars / `Total Expenditures`*100), 2),
        rank = rank(-Dollars),
        Category = ifelse(rank <= 13, Category, 'All Other Expenditures')) %>%
  select(-c(Year, `Total Expenditures`, rank)) %>%
  arrange(desc(`Percent of Total Expenditures`)) %>%

  ggplot() + 
  geom_col(aes(x = fct_reorder(Category, `Percent of Total Expenditures`), y = `Percent of Total Expenditures`), fill = "light green")+ 
  coord_flip() +
    xlab("") +
      ylab("Percent of Total Expenditure") +
    theme_bw()


exp_long %>%
  filter( Year == 2021) %>%
  mutate(`Total Expenditures` = sum(Dollars, na.rm = TRUE),
        `Percent of Total Expenditures` = round((Dollars / `Total Expenditures`*100), 2),
        rank = rank(-Dollars),
        Category_name = ifelse(rank <= 13, Category_name, 'All Other Expendiures')) %>%
  select(-c(Year, `Total Expenditures`, rank)) %>%
  arrange(desc(`Percent of Total Expenditures`)) %>%

  ggplot() + 
  geom_col(aes(x = fct_reorder(Category_name, `Percent of Total Expenditures`), y = `Percent of Total Expenditures`), fill = "light green")+ 
  coord_flip() +
  xlab("")+
    ylab("Percent of Total Expenditure") +
    theme_bw()

CAGR / Growth

STILL FOR FY21 calculations, will update later.

Each year, you will need to update the CAGR formulas!

calc_cagr is a function created for calculating the CAGRs for different spans of time.

# function for calculating the CAGR
calc_cagr <- function(df, n) {
  df <- exp_long %>%
    select(-type) %>%
    arrange(Category_name, Year) %>%
    group_by(Category_name) %>%
    mutate(cagr = ((`Dollars` / lag(`Dollars`, n)) ^ (1 / n)) - 1)

  return(df)
}

# This works for one variable at a time
cagr_23 <- calc_cagr(exp_long, 23) %>% 
  # group_by(Category) %>%
  summarize(cagr_23 = round(sum(cagr*100, na.rm = TRUE), 2))

cagr23_precovid <- exp_long %>%
  filter(Year <= 2019) %>%
  calc_cagr(21) %>% 
  summarize(cagr_21 = round(sum(cagr*100, na.rm = TRUE), 2))



cagr_10 <- calc_cagr(exp_long, 10) %>% 
  filter(Year == 2021) %>%
  summarize(cagr_10 = case_when(Year == 2021 ~ round(sum(cagr*100, na.rm = TRUE), 2)))

cagr_5 <- calc_cagr(exp_long, 5) %>% 
  filter(Year == 2021) %>%
  summarize(cagr_5 = case_when(Year == 2021 ~ round(sum(cagr*100, na.rm = TRUE), 2)))

cagr_3 <- calc_cagr(exp_long, 3) %>% 
  filter(Year == 2021) %>%
  summarize(cagr_3 = case_when(Year == 2021 ~ round(sum(cagr*100, na.rm = TRUE), 2)))

cagr_2 <- calc_cagr(exp_long, 2) %>% 
  filter(Year == 2021) %>%
  summarize(cagr_2 = case_when(Year == 2021 ~ round(sum(cagr*100, na.rm = TRUE), 2)))

cagr_1 <- calc_cagr(exp_long, 1) %>% 
  filter(Year == 2021) %>%
  summarize(cagr_1 = case_when(Year == 2021 ~ round(sum(cagr*100, na.rm = TRUE), 2)))

CAGR_expenditures_summary <- data.frame(cagr_1, cagr_2, cagr_3, cagr_5, cagr_10, cagr_23 ) %>% 
  select(-c(Category_name.1, Category_name.2, Category_name.3, Category_name.4, Category_name.5 )) %>% 
  rename("Expenditure Category" = Category_name, "1 Year CAGR" = cagr_1, "2 Year CAGR" = cagr_2, "3 Year CAGR" = cagr_3, "5 Year CAGR" = cagr_5, "10 Year CAGR" = cagr_10,"23 Year CAGR" = cagr_23 )

CAGR_expenditures_summary
# to have it as a csv, uncomment the line below
write_csv(CAGR_expenditures_summary, "CAGR_expenditures_summary.csv")
calc_cagr <- function(df, n) {
  df <- rev_long %>%
    arrange(Category_name, Year) %>%
    group_by(Category_name) %>%
    mutate(cagr = ((Dollars / lag(Dollars, n)) ^ (1 / n)) - 1)

  return(df)
}

# This works for one variable at a time
cagr_23 <- calc_cagr(rev_long, 23) %>% 
     # group_by(Category) %>%
  summarize(cagr_23 = round(sum(cagr*100, na.rm = TRUE), 2))

cagr_10 <- calc_cagr(rev_long, 10) %>% 
  filter(Year == 2021) %>%
  summarize(cagr_10 = case_when(Year == 2021 ~ round(sum(cagr*100, na.rm = TRUE), 2)))

cagr_5 <- calc_cagr(rev_long, 5) %>% 
  filter(Year == 2021) %>%
  summarize(cagr_5 = case_when(Year == 2021 ~ round(sum(cagr*100, na.rm = TRUE), 2)))

cagr_3 <- calc_cagr(rev_long, 3) %>% 
  filter(Year == 2021) %>%
  summarize(cagr_3 = case_when(Year == 2021 ~ round(sum(cagr*100, na.rm = TRUE), 2)))

cagr_2 <- calc_cagr(rev_long, 2) %>% 
  filter(Year == 2021) %>%
  summarize(cagr_2 = case_when(Year == 2021 ~ round(sum(cagr*100, na.rm = TRUE), 2)))

 cagr_1 <- calc_cagr(rev_long, 1) %>% 
  filter(Year == 2021) %>%
  summarize(cagr_1 = case_when(Year == 2021 ~ round(sum(cagr*100, na.rm = TRUE), 2)))

CAGR_revenue_summary <- data.frame(cagr_1, cagr_2, cagr_3, cagr_5, cagr_10, cagr_23) %>%   
  select(-c(Category_name.1, Category_name.2, Category_name.3, Category_name.4, Category_name.5 )) %>% 
  rename("Revenue Category" = Category_name, "1 Year CAGR" = cagr_1, "2 Year CAGR" = cagr_2, "3 Year CAGR" = cagr_3, "5 Year CAGR" = cagr_5, "10 Year CAGR" = cagr_10,"23 Year CAGR" = cagr_23 )

CAGR_revenue_summary
# to have it as a csv, uncomment the line below
write_csv(CAGR_revenue_summary, "CAGR_revenue_summary.csv")

rm(cagr_1, cagr_2, cagr_3, cagr_5, cagr_10, cagr_23)

Expenditure and Revenue Growth using a lag formula:

 exp_long %>% 
  group_by(Category_name) %>% 
  mutate(Growth = ((Dollars) - lag(Dollars))/lag(Dollars) *100) %>% 
  summarize(Growth = round(mean(Growth, na.rm = TRUE), 2))
 rev_long %>% 
  group_by(Category_name) %>% 
  mutate(Growth = ((Dollars) - lag(Dollars))/lag(Dollars) *100) %>% 
  summarize(Growth = round(mean(Growth, na.rm = TRUE), 2))

Change from Previous Year

Final column not done yet

These calculations are still for fy20 to fy21 change!! Will be updated later for fy 22 document.

revenue_change <- rev_long %>%
  select(-c(type,Category)) %>%
  filter(Year > 2019) %>%
  pivot_wider(names_from = Year , values_from = Dollars,   names_prefix = "Dollars_") %>%
  mutate("Change from 2020 to 2021" = Dollars_2021 - Dollars_2020,
         "Percent Change from 2020 to 2021" = (Dollars_2021 -Dollars_2020)/Dollars_2020) %>%
  left_join(CAGR_revenue_summary, by = c("Category_name" = "Revenue Category")) %>% 
  select(-c(Dollars_2020,`1 Year CAGR`:`10 Year CAGR`))

revenue_change
expenditure_change <- exp_long %>%
  select(-c(type,Category)) %>%
  filter(Year > 2019) %>%
  pivot_wider(names_from = Year , values_from = Dollars,   names_prefix = "Dollars_") %>%
  mutate("Change from 2020 to 2021" = Dollars_2021 - Dollars_2020,
         "Percent Change from 2020 to 2021" = (Dollars_2021 -Dollars_2020)/Dollars_2020) %>%
  left_join(CAGR_expenditures_summary, by = c("Category_name" = "Expenditure Category")) %>% 
  select(-c(Dollars_2020,`1 Year CAGR`:`10 Year CAGR`))

expenditure_change

Create summary file

Saves main items in one excel file named summary_file.xlsx. Delete eval=FALSE to run on local computer.

#install.packages("openxlsx")
library(openxlsx)

dataset_names <- list('rev_long' = rev_long, 'exp_long' = exp_long, 
                      `Table 1` = expenditure_change, `Table 2` = revenue_change,
                      'Table 4.a' = CAGR_revenue_summary, 'Table 4.b' = CAGR_expenditures_summary, 
                      'year_totals' = year_totals)

write.xlsx(dataset_names, file = 'summary_file_AWM_v2.xlsx')